## clear environment
rm(list = ls())

## load packages
library(sf) # vector spatial data
library(raster) # continuous spatial data
library(dplyr) # data manipulation
library(ggplot2) # plotting
library(xtable) # tex tables for export

## custom blank theme
theme_rw <- function() {
  theme_bw() +
    theme(plot.background = element_blank(), panel.grid.minor = element_blank(),
          panel.grid.major = element_blank(), panel.border = element_blank())
  
}

## called via namespace instead of loaded
## - meltt
## - mwa
## - lubridate
## - units



## load data ####

## load ACLED
ACLED <- read.csv('Data/events/Africa_1997-2018_upd-Jul23.csv')

## load GED
load('Data/events/ged172.Rdata')

## load population data
pop_dens <- raster('Data/rasters/pop_dens.tif')
pop_tot <- raster('Data/rasters/pop_count.tif')

## load nightlights
nl <- raster('Data/rasters/nightlights.tif')

## load shapefiles
cshapes <- st_read('Data/cshapes', 'cshapes')
sl <- readRDS('Data/subnational_boundaries/gadm36_SLE_2_sf.rds')

## load road data
roads <- st_read('Data/roads',
                 'A_and_B_all_utm29_distance_aggregated_to_AandB_junctions')

## load diamond data
diamonds <- st_read('Data/diamonds/dDIAL.shp')

## load one sided violence data
OSV <- read.csv('Data/events/ucdp-onesided-181.csv')

## load taxonomies for meltt
taxonomies <- lapply(list.files('Taxonomies/', full.names = T), read.csv)
names(taxonomies) <- c('actor_tax', 'event_tax', 'prec_tax')

## correct source names to uppercase in event taxonomy
taxonomies$actor_tax$data.source <- toupper(taxonomies$actor_tax$data.source)



## meltt preparation ####

# for events
names(GED)[names(GED)=='type_of_vi'] = 'event_tax'
names(ACLED)[names(ACLED)=='EVENT_TYPE'] = 'event_tax'

# for actors
names(GED)[names(GED)=='side_a'] = 'actor_tax' # given UCPD dyad conventions
names(ACLED)[names(ACLED)=='ACTOR1'] = 'actor_tax'

# for precision
names(GED)[names(GED)=='where_prec'] = 'prec_tax'
names(ACLED)[names(ACLED)=='GEO_PRECISION'] = 'prec_tax'

# Cleaning UCDP-GED
GED$date_start = as.Date(GED$date_start)
names(GED)[names(GED)=='date_start'] = 'date'
GED$date_end = as.Date(GED$date_end)
names(GED)[names(GED)=='date_end'] = 'enddate'

# Cleaning ACLED
colnames(ACLED) = tolower(colnames(ACLED))
ACLED$date = as.Date(ACLED$event_date)

## limit to sierra leone
ACLED <- ACLED %>% filter(country == 'Sierra Leone',
                          event_tax == 'Violence against civilians',
                          year >= 1997, year <= 2001)
GED <- GED %>% filter(country == 'Sierra Leone',
                      event_tax == 3, year >= 1997, year <= 2001,
                      !grepl('Government', actor_tax))

## combine events
output <- meltt::meltt(ACLED, GED, taxonomies = taxonomies, twindow = 10, spatwindow = 10)

## get number of duplicated events
nrow(output$processed$event_matched)

## get number of total input events
nrow(output$processed$complete_index)

## calculate percent overlap between datasets
nrow(output$processed$event_matched) / nrow(output$processed$deduplicated_index)

## extract combined civilian targeting events
civ_comb <- meltt::meltt_data(output)

## reload ACLED for capitalized variable names
ACLED <- read.csv('Data/events/Africa_1997-2018_upd-Jul23.csv', stringsAsFactors = F)



## spatial data prep ####

## subset population data to polygon
pop_dens <- crop(pop_dens, cshapes %>% filter(CNTRY_NAME == 'Sierra Leone'))
pop_tot <- crop(pop_tot, cshapes %>% filter(CNTRY_NAME == 'Sierra Leone'))
nl <- crop(nl, cshapes %>% filter(CNTRY_NAME == 'Sierra Leone'))

## create sf object for capital
capital <- cshapes %>% filter(CNTRY_NAME == 'Sierra Leone') %>%
  st_set_geometry(NULL) %>% 
  st_as_sf(coords = c('CAPLONG', 'CAPLAT'), crs = 4326)

## project roads to WGS84
roads <- st_transform(roads, st_crs(cshapes))

## susbet diamonds to sierra leone
diamonds <- diamonds %>% filter(COUNTRY == 'Sierra Leone')



## event data prep ####

## subset to sierra leone
ACLED <- ACLED %>% filter(COUNTRY == 'Sierra Leone', YEAR %in% 1997:2001)

## limit to battles and exclude state perpetrated violence against civilians; drop
## intra-group conflicts; consolidate factional sub-actors into single actors
battles <- ACLED %>%
  filter(EVENT_TYPE %in% c('Battle-No change of territory',
                           'Battle-Non-state actor overtakes territory'),
         !(grepl('Military|Police', ACTOR1, ignore.case = T) &
             EVENT_TYPE == 'Violence against civilians'),
         ACTOR1 != ACTOR2) %>%
  mutate(ACTOR1 = ifelse(grepl('Kamajor Militia', ACTOR1),
                         'Kamajor Militia', ACTOR1),
         ACTOR1 = ifelse(grepl('AFRC: Armed Forces Revolutionary Council', ACTOR1),
                         'AFRC: Armed Forces Revolutionary Council', ACTOR1),
         ACTOR1 = ifelse(grepl('RUF: Revolutionary United Front', ACTOR1),
                         'RUF: Revolutionary United Front', ACTOR1),
         ACTOR2 = ifelse(grepl('Kamajor Militia', ACTOR2),
                         'Kamajor Militia', ACTOR2),
         ACTOR2 = ifelse(grepl('AFRC: Armed Forces Revolutionary Council', ACTOR2),
                         'AFRC: Armed Forces Revolutionary Council', ACTOR2),
         ACTOR2 = ifelse(grepl('RUF: Revolutionary United Front', ACTOR2),
                         'RUF: Revolutionary United Front', ACTOR2))

## limit events to largest rebel groups
battles <- battles %>% filter(ACTOR1 %in% c('Kamajor Militia',
                                          'AFRC: Armed Forces Revolutionary Council',
                                          'RUF: Revolutionary United Front') |
                              ACTOR2 %in% c('Kamajor Militia',
                                            'AFRC: Armed Forces Revolutionary Council',
                                            'RUF: Revolutionary United Front'))

## limit events to RUF
battles <- battles %>% filter(ACTOR1 == 'RUF: Revolutionary United Front' |
                              ACTOR2 == 'RUF: Revolutionary United Front') %>%
  mutate(timestamp = lubridate::ymd(EVENT_DATE), lat = LATITUDE, lon = LONGITUDE) %>%
  dplyr::select(timestamp, lat, lon, EVENT_TYPE) %>%
  filter(lubridate::year(timestamp) %in% 1997:2001)



## attributed RUF attacks only ####

## get violence against civilians
civs_RUF <- civ_comb %>%
  filter(grepl('RUF', actor_tax)) %>% 
  mutate(EVENT_TYPE = 'Violence against civilians',
         lat = latitude, lon = longitude,
         timestamp = date) %>% 
  dplyr::select(timestamp, lat, lon, EVENT_TYPE)

## combine treatment, control, and dependent events
mw_data_RUF <- rbind(battles, civs_RUF)

## create sf of events to measure spatial covariates
mw_data_sf_RUF <- st_as_sf(mw_data_RUF, coords = c('lon', 'lat'),
                           crs = 4326, agr = 'identity')

## measure spatial covariates
mw_data_RUF <- mw_data_RUF %>%
  mutate(pop_dens = log(raster::extract(pop_dens, mw_data_sf_RUF)),
         pop_tot = log(raster::extract(pop_tot, mw_data_sf_RUF)),
         nl = raster::extract(nl, mw_data_sf_RUF),
         cap_dist = log(units::set_units(st_distance(mw_data_sf_RUF, capital, by_element = T), km)),
         road_dist = log(units::set_units(apply(st_distance(mw_data_sf_RUF, roads), 1, min), km)),
         dia_dist = log(units::set_units(apply(st_distance(mw_data_sf_RUF, diamonds), 1, min), km)),
         year = lubridate::year(timestamp))

## carry out matched wake analysis; past treatment control control; weighted
mw_RUF_wg_tcm <- mwa::matchedwake(mw_data_RUF, t_window = c(5, 60, 5), spat_window = c(2, 10, 2),
                             treatment = c('EVENT_TYPE',
                                           'Battle-Non-state actor overtakes territory'),
                             control = c('EVENT_TYPE',
                                         'Battle-No change of territory'),
                             dependent = c('EVENT_TYPE',
                                           'Violence against civilians'),
                             matchColumns = c('pop_dens', 'nl', 'cap_dist',
                                              'road_dist', 'dia_dist'),
                             TCM = T, weighted = T, alpha2 = .05)



## table 1 ####

## main results table
sum_mw_RUF_wg_tcm <- mwa:::summary.matchedwake(mw_RUF_wg_tcm)
colnames(sum_mw_RUF_wg_tcm)[colnames(sum_mw_RUF_wg_tcm) %in%
                              c('EffectSize',
                                'p.value',
                                'adj.Rsquared')] <- c('Effect Size',
                                                      'p-value',
                                                      'Adj. R$^2$')
print(sum_mw_RUF_wg_tcm)

## figure 4 ####

## main results plot
mwa:::plot.matchedwake(mw_RUF_wg_tcm)


## figure 2 ####

## histrogram of events
ggplot(mw_data_RUF, aes(x = timestamp)) +
  geom_histogram() +
  facet_wrap(~ EVENT_TYPE) +
  labs(x = '', y = 'Count') +
  theme_rw()


## figure 3 ####

## map - color
ggplot(mw_data_sf_RUF, aes(fill = EVENT_TYPE, color = EVENT_TYPE)) +
  geom_sf(data = sl, inherit.aes = F, fill = NA, color = 'gray30') +
  geom_sf(data = roads, inherit.aes = F, color = 'gray60', linetype = 'dotted') +
  geom_sf(data = capital, inherit.aes = F, fill = NA, shape = 5, size = 3,
          stroke = 2, color = 'darkorchid2') +
  geom_sf(data = diamonds, inherit.aes = F, color = 'gray35', shape = 18,
          size = 2.5) +
  geom_sf(alpha = .4) +
  coord_sf(crs = st_crs(mw_data_sf_RUF), datum = NA) +
  scale_color_discrete(name = '') +
  scale_fill_discrete(name = '') +
  theme_rw() +
  theme(legend.position = 'bottom') +
  guides(fill = guide_legend(nrow = 1, title.position = 'left'),
         color = guide_legend(nrow = 1, title.position = 'left'))

## map - black and white
ggplot(mw_data_sf_RUF, aes(fill = EVENT_TYPE, color = EVENT_TYPE)) +
  geom_sf(data = sl, inherit.aes = F, fill = NA, color = 'gray30') +
  geom_sf(data = roads, inherit.aes = F, color = 'gray60', linetype = 'dotted') +
  geom_sf(data = capital, inherit.aes = F, fill = NA, shape = 5, size = 3,
          stroke = 2, color = 'black') +
  geom_sf(data = diamonds, inherit.aes = F, color = 'black', shape = 18,
          size = 2.5) +
  geom_sf(alpha = .4) +
  coord_sf(crs = st_crs(mw_data_sf_RUF), datum = NA) +
  scale_color_grey(name = '') +
  scale_fill_grey(name = '') +
  theme_rw() +
  theme(legend.position = 'bottom') +
  guides(fill = guide_legend(nrow = 1, title.position = 'left'),
         color = guide_legend(nrow = 1, title.position = 'left'))


## figure 5 a and b ####

## split RUF attacks data in half based on diamond distance
diamond_near <- mw_data_RUF %>% filter(dia_dist < median(dia_dist))
diamond_far <- mw_data_RUF %>% filter(dia_dist >= median(dia_dist))

## get number of civilan targeting events near diamond mines
diamond_near_count <- diamond_near %>% filter(EVENT_TYPE == 'Violence against civilians') %>%
  nrow()
diamond_far_count <- diamond_far %>% filter(EVENT_TYPE == 'Violence against civilians') %>%
  nrow()

## carry out matched wake analysis; past treatment control control; weighted; near
mw_RUF_dia_near_wg_tcm <- mwa::matchedwake(diamond_near, t_window = c(5, 60, 5),
                                           spat_window = c(2, 10, 2),
                                           treatment = c('EVENT_TYPE',
                                                         'Battle-Non-state actor overtakes territory'),
                                           control = c('EVENT_TYPE',
                                                       'Battle-No change of territory'),
                                           dependent = c('EVENT_TYPE',
                                                         'Violence against civilians'),
                                           matchColumns = c('pop_dens', 'nl', 'cap_dist',
                                                            'road_dist', 'dia_dist'),
                                           TCM = T, weighted = T, alpha2 = .05)

## carry out matched wake analysis; past treatment control control; weighted; far
mw_RUF_dia_far_wg_tcm <- mwa::matchedwake(diamond_far, t_window = c(5, 60, 5),
                                          spat_window = c(2, 10, 2),
                                          treatment = c('EVENT_TYPE',
                                                        'Battle-Non-state actor overtakes territory'),
                                          control = c('EVENT_TYPE',
                                                      'Battle-No change of territory'),
                                          dependent = c('EVENT_TYPE',
                                                        'Violence against civilians'),
                                          matchColumns = c('pop_dens', 'nl', 'cap_dist',
                                                           'road_dist', 'dia_dist'),
                                          TCM = T, weighted = T, alpha2 = .05)

mwa:::plot.matchedwake(mw_RUF_dia_near_wg_tcm )
mwa:::plot.matchedwake(mw_RUF_dia_far_wg_tcm )

## figure 6 a and b ###3

## split RUF attacks data in half based on capital distance
mw_data_RUF_near <- mw_data_RUF %>% filter(cap_dist < median(cap_dist))
mw_data_RUF_far <- mw_data_RUF %>% filter(cap_dist >= median(cap_dist))

## get number of civilan targeting events near diamond mines
mw_data_RUF_near_count <- mw_data_RUF_near %>% filter(EVENT_TYPE == 'Violence against civilians') %>%
  nrow()
mw_data_RUF_far_count <- mw_data_RUF_far %>% filter(EVENT_TYPE == 'Violence against civilians') %>%
  nrow()

## carry out matched wake analysis; past treatment control control; weighted; near
mw_RUF_near_wg_tcm <- mwa::matchedwake(mw_data_RUF_near, t_window = c(5, 60, 5),
                                       spat_window = c(2, 10, 2),
                                       treatment = c('EVENT_TYPE',
                                                     'Battle-Non-state actor overtakes territory'),
                                       control = c('EVENT_TYPE',
                                                   'Battle-No change of territory'),
                                       dependent = c('EVENT_TYPE',
                                                     'Violence against civilians'),
                                       matchColumns = c('pop_dens', 'nl', 'cap_dist',
                                                        'road_dist', 'dia_dist'),
                                       TCM = T, weighted = T, alpha2 = .05)

## carry out matched wake analysis; past treatment control control; weighted; far
mw_RUF_far_wg_tcm <- mwa::matchedwake(mw_data_RUF_far, t_window = c(5, 60, 5),
                                      spat_window = c(2, 10, 2),
                                      treatment = c('EVENT_TYPE',
                                                    'Battle-Non-state actor overtakes territory'),
                                      control = c('EVENT_TYPE',
                                                  'Battle-No change of territory'),
                                      dependent = c('EVENT_TYPE',
                                                    'Violence against civilians'),
                                      matchColumns = c('pop_dens', 'nl', 'cap_dist',
                                                       'road_dist', 'dia_dist'),
                                      TCM = T, weighted = T, alpha2 = .05)

mwa:::plot.matchedwake(mw_RUF_near_wg_tcm)
mwa:::plot.matchedwake(mw_RUF_far_wg_tcm)

## RUF civilian targeting quantile in-text p. 6 ####

## calculate total fatalities in each actor-civilian dyad
OSV <- OSV %>% group_by(conflict_id) %>% summarize(fatalities = sum(best_fatality_estimate)) %>% arrange(fatalities)

## calculate quantile of RUF civilian targeting
RUF_quantile <- which(OSV$conflict_id == GED %>% filter(actor_tax == 'RUF') %>%
                        dplyr::select(conflict_n) %>%
                        unique() %>% as.numeric) / nrow(OSV)
